from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from zhuaqu.items import DmozItem
import re
class DmozSpider(BaseSpider):
   name = "dmoz"
   allowed_domains = ["yaozh.com"]
   start_urls = [
       r"http://db.yaozh.com//index.php?action=yibao&amp;search=search&amp;name=%E5%A4%8D%E6%96%B9%E6%B0%A8%E7%BB%B4%E8%83%B6%E5%9B%8A",
   ]
   def parse(self, response):
       hxs = HtmlXPathSelector(response)
       item = DmozItem()
       item['title'] = hxs.xpath("//title").extract()
       item['yaoZhiID'] = response.url.split("/")[-1][0:6]
       item['intro'] = re.findall('<META name="description" content="(.*?)">',response.body)
       return item